Librairies utilisées

library(ggplot2)
library(plot3D)
library(stringr)
require(MASS)
## Loading required package: MASS

Vérification des priors utilisés

Fonction

#Data frame prior
data.frame.prior = function(seq_ne, max_simu){

  #Créer matrice vide qui contiendra les stats
  mat_prior = matrix(data = NA, nrow = length(seq_ne)*max_simu, ncol = 4)
  #Attribution nom colonnes selon appelation stat par MetHis
  colnames(mat_prior) = c("Ne", "simu", "s1", "s2")  
  mat_prior = as.data.frame(mat_prior)
  #Compteur pour indiquer les lignes à remplir
  cpt = 0
  
  for (ne in seq_ne) {
    dir_ne = str_c("Ne", toString(ne), "/") #Répertoire de taille effective
    row_min = cpt*max_simu+1
    row_max = (cpt+1)*max_simu
    mat_prior[row_min:row_max,1] = toString(ne)
    cpt = cpt+1
    
    for (nb_simu in 1:max_simu) {
      mat_prior[row_min+nb_simu-1,2] = nb_simu
      dir_simu = str_c("simu_", toString(nb_simu), "/")
      file_path = str_c(dir_ne, dir_simu, "simu_", nb_simu, ".txt")
      df_tmp = read.table(file_path, header = TRUE)
      mat_prior[row_min+nb_simu-1,3:4] = df_tmp[1,5:6]
    }
  }
  

  #Passage des Ne en facteur
  mat_prior$Ne = factor(as.factor(mat_prior$Ne), levels = as.character(seq_ne))
  #Passage simuulations en entier
  mat_prior$simu = as.integer(mat_prior$simu)  

  return(mat_prior)
}

Application

#seq_1024_16384 = 2**seq(10,14,1)
#seq_60_100 = 10*seq(5,10,1)
#seq_tot = sort(c(seq_60_100, seq_64_8192))
#mat_prior_1024_16384 = data.frame.prior(seq_1024_16384, 100)
#summary(mat_prior_1024_16384$s1)

Résumé des lectures de stat

Fonctions utilisées

#Data frame construction
data.frame.stat = function(seq_ne, max_gen, max_simu){

  #Créer matrice vide qui contiendra les stats
  mat_stat = matrix(data = NA, nrow = length(seq_ne)*max_gen*max_simu, ncol = 63)
  mat_stat = as.data.frame(mat_stat)
  #Compteur pour indiquer les lignes à remplir
  cpt = 0

  for (ne in seq_ne) {
    if (dir.exists(str_c("Ne", toString(ne), "/"))) {
      dir_ne = str_c("Ne", toString(ne), "/") #Répertoire de taille effective
      row_min = cpt*max_gen*max_simu+1 #Ligne min. où on écrit les stats pour le Ne donné
      row_max = (cpt+1)*max_gen*max_simu         #Ligne max. où on écrit les stats  
      mat_stat[row_min:row_max,1] = toString(ne) #Ecriture du Ne correspondant pour les stats qui seront écrites
      cpt = cpt+1                             #Incrémentation du compteur
  
      for (nb_simu in 1:max_simu) {
        dir_simu = str_c("simu_", toString(nb_simu), "/")
        row_min_simu = (nb_simu-1)*max_gen + row_min
        row_max_simu = nb_simu*max_gen + row_min -1
        mat_stat[row_min_simu:row_max_simu,2] = nb_simu
        file_path = str_c(dir_ne, dir_simu, "final_sumstats.txt")
        file_stat = read.table(file_path, header = TRUE) #Lecture fichier stat résumées
        #Suppression 1ère colonne, contenant uniquement chiffres pour tri en bash. 
        mat_stat[row_min_simu:row_max_simu,3:63] = file_stat
      }
    }
  }
  

  #Attribution nom colonnes selon appelation stat par MetHis
  colnames(mat_stat) = c("Ne", "simu", names(file_stat))
  #Passage des Ne en facteur
  mat_stat$Ne = factor(as.factor(mat_stat$Ne), levels = as.character(seq_ne))
  #Passage simulations en entier
  mat_stat$simu = as.integer(mat_stat$simu)
  #Passage générations en entier
  mat_stat$Generation = as.integer(mat_stat$Generation)
  #Passage stats en double
  for (numcol in 4:ncol(mat_stat)) {
    mat_stat[,numcol] = as.double(mat_stat[,numcol])
  }
  
  return(mat_stat)
}
#Passage en moyenne
data.frame.mean = function(df_stat, max_gen, seq_ne){
  
  lrow = length(seq_ne)
  df_mean = matrix(data = NA, nrow = lrow*max_gen, ncol = 62 )
  df_mean = as.data.frame(df_mean)
  cpt = 0

  for (ne in seq_ne) {
    df_tmp = df_stat[which(df_stat$Ne == ne),]
    
    row_min = cpt*max_gen+1
    row_max = (cpt+1)*max_gen
    df_mean[row_min:row_max, 1] = toString(ne) 
    cpt = cpt+1
    
    for (gen in 0:(max_gen-1) ) {
      df_mean[row_min+gen, 2] = gen
      vec_tmp = apply(df_tmp[which(df_tmp$Generation == gen), 4:63], 2, mean)
      df_mean[row_min+gen, 3:62] = vec_tmp
    }
  }
  
  colnames(df_mean) = colnames(df_stat)[-2]
  df_mean$Ne = factor(as.factor(df_mean$Ne), levels = seq_ne )
  return(df_mean)
}
#Plot function
#Affichage d'une stat au cours des générations
plot_stat_gen = function(df, gen, stat, color_col, titre, ligne = FALSE, legd = TRUE){
  p = ggplot(df, aes(x = gen, y = stat, color = color_col)) + ggtitle(titre)
  
  if (ligne) {
    p = p + geom_point(size = 0.1, show.legend = legd)+ geom_line(show.legend = legd)
  }else{
    #smooth de la courbe pour obtenir régression et tempérer variabilité due au TCL
    p = p + geom_point(size = 0.1, show.legend = legd)+ geom_smooth(show.legend = legd)
  }
  
  print(p)
}
extract_sub_mat = function(all_mat, list_ne){
  all_rows = c()
  for (ne in as.character(list_ne)) {
    all_rows = append(all_rows, which(all_mat[,1] == ne))
  }
  return(all_mat[all_rows,])
}

Population constante de tailles initiales différentes

Calcul des matrices de stats

seq_1024_16384 = 2**seq(10,14,1)
seq_50_1000 = seq(50,1000,1)
seq_tot = sort(c(seq_50_1000, seq_1024_16384))

mat_tot = data.frame.stat(seq_tot, 101, 1)
mat_50_1000 = extract_sub_mat(mat_tot, seq_50_1000)
mat_1024_16384 = extract_sub_mat(mat_tot, seq_1024_16384)

#mat_1024_16384_mean = data.frame.mean(mat_1024_16384, 101, seq_1024_16384)

Graphiques obtenus

plot_stat_gen(mat_tot, mat_tot$Generation, 
              mat_tot$Fst.s1.adm, mat_tot$Ne,
              "Stat en fonction des générations\n selon différentes Ne initiales",
              legd = FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

plot_stat_gen(mat_50_1000, mat_50_1000$Generation, 
              mat_50_1000$Fst.s1.adm, mat_50_1000$Ne,
              "Stat en fonction des générations\n selon différentes Ne initiales",
              legd = FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

plot_stat_gen(mat_1024_16384, mat_1024_16384$Generation, 
              mat_1024_16384$Fst.s1.adm, mat_1024_16384$Ne,
              "Stat en fonction des générations\n selon différentes Ne initiales")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

#plot_stat_gen(mat_1024_16384_mean, mat_1024_16384_mean$Generation, 
#              mat_1024_16384_mean$Fst.s1.adm, mat_1024_16384_mean$Ne,
#              "Stat en fonction des générations\n selon différentes Ne initiales")
#plot_stat_gen(mat_1024_16384_mean, mat_1024_16384_mean$Generation, 
#              mat_1024_16384_mean$mean.het.adm, mat_1024_16384_mean$Ne,
#              "Stat en fonction des générations\n selon différentes Ne initiales")
#plot_stat_gen(mat_64_8192, mat_64_8192$gen, 
#              mat_64_8192$mean.adm.props, mat_64_8192$Ne,
#              "Stat en fonction des générations\n selon différentes Ne initiales", #TRUE)

Populations croissantes

data.frame.increase = function(seq_ne_ini, seq_combi){
  for (ne_ini in seq_ne_ini) {
    if (dir.exists(str_c("Ne", ne_ini, "-XXX/"))) {
      dir_ne = str_c("Ne", ne_ini, "-XXX/")
      setwd(dir_ne)
      motif_detect = str_c("^",ne_ini,"-")
      vec_ne_tmp = seq_combi[which(str_detect(seq_combi, motif_detect))]
      mat_tmp = data.frame.stat(seq_ne = vec_ne_tmp, max_gen = 101, max_simu = 1)
      if (ne_ini == seq_ne_ini[1]) {
        mat_pop_inc = mat_tmp
      }else{
        mat_pop_inc = rbind(mat_pop_inc, mat_tmp)
      }
      setwd("../")
    }
  }
  return(na.omit(mat_pop_inc))
}
setwd("../new_methis_pop_increase_50000_snp/")
seq_ne_ini = seq(50,100,2)
seq_ne_fin = seq(100, 10000, 100)
seq_combi = as.data.frame(outer(seq_ne_ini, seq_ne_fin, FUN="paste", sep="-"))
seq_combi = as.data.frame.vector(unlist(seq_combi))[[1]]

mat_pop_inc = data.frame.increase(seq_ne_ini, seq_combi)


mat_end_10000 = extract_sub_mat(mat_pop_inc,
                                as.data.frame(outer(seq_ne_ini,
                                                    c("10000"),
                                                    FUN="paste",
                                                    sep="-"))[,1])
plot_stat_gen(mat_pop_inc, mat_pop_inc$Generation, 
              mat_pop_inc$f3, mat_pop_inc$Ne,
              "Stat en fonction des générations\n selon différentes Ne initiales",
              FALSE, legd = FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

plot_stat_gen(mat_pop_inc, mat_pop_inc$Generation, 
              mat_pop_inc$mean.het.adm, mat_pop_inc$Ne,
              "Stat en fonction des générations\n selon différentes Ne initiales",
              FALSE, legd = FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

plot_stat_gen(mat_end_10000, mat_end_10000$Generation, 
              mat_end_10000$mean.het.adm, mat_end_10000$Ne,
              "Stat en fonction des générations\n selon différentes Ne initiales",
              legd = TRUE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Variation Nu

setwd("../new_methis_pop_increase_50000_snp_Nu_var/")
seq_nu = sprintf("%.2f", seq(0.05, 0.45, 0.05))
seq_ne_ini = seq(50,100,10)
seq_ne_fin = seq(100, 10000, 200)
seq_combi = as.data.frame(outer(seq_ne_ini, seq_ne_fin, FUN="paste", sep="-"))
seq_combi = as.data.frame.vector(unlist(seq_combi))[[1]]

list_df_nu = list()
cpt = 1

for (nu in seq_nu) {
  if(dir.exists(str_c("Nu", nu ))){
    dir_nu = str_c("Nu", nu, "/")
    setwd(dir_nu)
    list_df_nu[[cpt]] = data.frame.increase(seq_ne_ini, seq_combi)
    cpt = cpt+1
    setwd("../")
  }
}

for (combi_lst in 1:length(list_df_nu)) {
  if (combi_lst == 1) {
    mat_pop_nu = list_df_nu[[combi_lst]]
    mat_pop_nu = data.frame(mat_pop_nu, Nu = seq_nu[combi_lst])
  }else{
    mat_tmp = list_df_nu[[combi_lst]]
    mat_tmp = data.frame(mat_tmp, Nu = seq_nu[combi_lst])
    mat_pop_nu = rbind(mat_pop_nu, mat_tmp)
  }
}

mat_pop_nu$Nu = as.factor(mat_pop_nu$Nu)
plot_stat_gen(mat_pop_nu, mat_pop_nu$Generation, 
              mat_pop_nu$mean.het.adm, mat_pop_nu$Ne,
              "Stat en fonction des générations\n selon différentes Ne initiales",
              FALSE, legd = FALSE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

plot_stat_gen(mat_pop_nu, mat_pop_nu$Generation, 
              mat_pop_nu$mean.het.adm, mat_pop_nu$Nu,
              "Stat en fonction des générations\n selon différentes Ne initiales",
              FALSE, legd = FALSE)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

plot_stat_gen(mat_pop_nu, mat_pop_nu$Generation, 
              mat_pop_nu$mean.het.adm, mat_pop_nu$Ne,
              "Stat en fonction des générations\n selon différentes Ne initiales",
              TRUE, legd = FALSE)

plot_stat_gen(mat_pop_nu, mat_pop_nu$Generation, 
              mat_pop_nu$mean.het.adm, mat_pop_nu$Nu,
              "Stat en fonction des générations\n selon différentes Ne initiales",
              TRUE, legd = FALSE)

for (rg_list in 1:length(list_df_nu)) {
  plot_stat_gen(list_df_nu[[rg_list]], list_df_nu[[rg_list]]$Generation, 
              list_df_nu[[rg_list]]$mean.het.adm, list_df_nu[[rg_list]]$Ne,
              str_c("Nu = ",seq_nu[rg_list]), TRUE,legd = FALSE)

}

for (rg_list in 1:length(list_df_nu)) {
  
  list_ne = list_df_nu[[rg_list]]$Ne[which(list_df_nu[[rg_list]]$Generation == 100)]
  list_ne = as.integer(unlist(str_split(list_ne, "-")))
  list_ne0 = list_ne[seq(1, length(list_ne), 2)]
  list_nef = list_ne[seq(2, length(list_ne), 2)]
  mat_tmp = list_df_nu[[rg_list]][which(list_df_nu[[rg_list]]$Generation == 100),]
  
  scatter3D(list_ne0, list_nef, mat_tmp$mean.het.adm, type = "s", radius = 0.2,
            xlab = "Ne0", ylab = "Nef", zlab = "Stat")

}